29 July 2018

Loading the data

Let's load a dataset on the population of Bilbao in 2018.

if (!file.exists("neighborhoods.csv"))
{
    download.file("http://www.bilbao.eus/bilbaoopendata/demografia/numero_habitantes_barrio_sexo_2018.csv", 
                  destfile = "neighborhoods.csv")
}

population <- read.csv("neighborhoods.csv", 
                       sep = ";", stringsAsFactors = FALSE,
                       encoding = "bytes")
dim(population)
## [1] 93  5

Preprocessing

population <- population[
    !is.na(population$COD..BARRIO) & 
        !grepl("^DISEMINADO", population$BARRIO),]

library(dplyr)
population <- transmute(population, 
                  neighborhoodCode = factor(as.integer(COD..BARRIO)),
                  neighborhood = as.character(BARRIO), 
                  sex = (function(x){ 
                      x = sub("HOMBRES", "male", x, 
                              ignore.case = TRUE); 
                      x = sub("MUJERES", "female", x, 
                              ignore.case = TRUE); x 
                  })(SEXO), 
                  population = TOTAL)
library(tidyr)
population <- spread(population, sex, population)

Population of Bilbao by neighborhood